1 Introduction

2 Visualization & Analysis

2.1 Data Cleaning

# read in csv file
trees <- read.csv("./Burlington_Trees.csv")
library("magrittr")

# separate Geo.Point column to latitude and longitude
# and convert to numeric variables
trees <- trees %>% 
         separate(Geo.Point, c("lat", "long"), ",") %>% 
         mutate(lat = as.numeric(lat),
                long = as.numeric(long))

# separate species column into genus, species
trees <- trees %>% 
         separate(species, c("genus", "species"), ",")
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4170 rows [1, 4, 5, 7, 9, 13, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, ...].
# remove the "spp" included as a place holder for genus
trees$genus <- gsub(" spp", "", trees$genus)

# correct the misspelling of mapl to maple
trees$genus <- gsub("mapl", "maple", trees$genus, fixed = TRUE)
trees$genus <- gsub("maplee", "maple", trees$genus)

# convert zeros in numeric columns to NA so they will not be included in graphs
# in this case, zero values are due to lack of information, not lack of value,
# so all were converted to NA values to be filtered out later
# repeat for blank values
trees[trees == 0] <- NA
trees[trees == ""] <- NA

# convert dates to better format
trees <- trees %>% 
  mutate(modified = as.yearmon(modified, "%m/%Y"))

# cleaned data frame
head(trees)
NA

2.2 Number of Trees by Species

```r
# number of trees by species
# histogram

<!-- rnb-source-end -->

<!-- rnb-chunk-end -->


<!-- rnb-text-begin -->



## Relationship between species abundance and Land Use


<!-- rnb-text-end -->


<!-- rnb-chunk-begin -->


<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuIyBsYW5kdXNlIHYgc3BlY2llc1xuIyBtdWx0aXBsZSBiYXIgY2hhcnRcbiMgc2VlIGlmIGNlcnRhaW4gdHlwZXMgb2YgdHJlZXMgYXJlIG1vcmUgY29tbW9uIGJ5IGJ1c2luZXNzLCByZXNpZGVudGlhbCwgZXRjLlxuXG4jIGZvcm1hdCB0aGUgZGF0YSBpbnRvIGEgbmV3IGRhdGEgZnJhbVxubGFuZHVzZV9ieV9nZW51cyA8LSB4dGFicyhmb3JtdWxhID0gfiBsYW5kdXNlICsgZ2VudXMsXG4gICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSB0cmVlcykgJT4lIFxuICBwcm9wLnRhYmxlKG1hcmdpbiA9IFwibGFuZHVzZVwiKSAlPiUgICMgY2FsY3VsYXRlcyBwcm9wb3J0aW9cbiAgXG4gICMgcGlwZSBpbnRvIGRhdGEuZnJhbWVcbiAgIyBVc2luZyBkYXRhLmZyYW1lKCkgdHVybnMgaXQgZnJvbSBhIHRhYmxlIGludG8gYSBkYXRhIGZyYW1lXG4gIGRhdGEuZnJhbWUoKSAlPiUgXG4gIGZpbHRlcihGcmVxID4gLjEpIFxuXG5sYW5kdXNlX2J5X2dlbnVzXG5gYGAifQ== -->

```r
# landuse v species
# multiple bar chart
# see if certain types of trees are more common by business, residential, etc.

# format the data into a new data fram
landuse_by_genus <- xtabs(formula = ~ landuse + genus,
                       data = trees) %>% 
  prop.table(margin = "landuse") %>%  # calculates proportio
  
  # pipe into data.frame
  # Using data.frame() turns it from a table into a data frame
  data.frame() %>% 
  filter(Freq > .1) 

landuse_by_genus

landuse_species_bar <- ggplot(data = landuse_by_genus,
       mapping = aes(x = landuse,
                     fill = genus,
                     y = Freq)) + 
  
  # Now we need to tell geom_bar() to override it'd default choice of y
  # We do this with stat = "identity" (identity means "Use the y I gave you!")
  geom_bar(color = "black",
           stat = "identity",
           position = "fill") + 
  
  labs(y = "Proportion", 
       title = "Top Genus per Land Use Type",
       x = "Land Use Type")


landuse_species_bar

2.3 The Trees of Burlington

2.4 Appraisal Estimates

```r
# diameter v appraisal
# scatterplot probably

```

3 Machine Learning

LS0tCnRpdGxlOiAiQSBTdGF0aXN0aWNhbCBhbmQgVmlzdWFsIEFuYWx5c2lzIG9mIE11bmljaXBhbGx5LU1haW50YWluZWQgVHJlZXMgaW4gdGhlIENpdHkgb2YgQnVybGluZ3RvbiwgVlQiCmF1dGhvcjogIklzYWJlbGxlIEZyYW5rZSwgQ2Fyb2xpbmUgR3JlZW4sIFdpbGwgR3Vpc2JvbmQiCmRhdGU6ICI0LzE1LzIwMjIiCm91dHB1dDoKICBwZGZfZG9jdW1lbnQ6CiAgICBudW1iZXJfc2VjdGlvbnM6IHllcwogIGh0bWxfbm90ZWJvb2s6CiAgICBudW1iZXJfc2VjdGlvbnM6IHllcwotLS0KIyBJbnRyb2R1Y3Rpb24KCgojIFZpc3VhbGl6YXRpb24gJiBBbmFseXNpcwoKIyMgRGF0YSBDbGVhbmluZwpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeSh6b28pCmxpYnJhcnkobWFwdmlldykKbGlicmFyeShnZ21hcCkKCnRoZW1lX3NldCh0aGVtZV9idygpKQoKYGBgCgpgYGB7ciBkYXRhX2NsZWFufQojIHJlYWQgaW4gY3N2IGZpbGUKdHJlZXMgPC0gcmVhZC5jc3YoIi4vQnVybGluZ3Rvbl9UcmVlcy5jc3YiKQpsaWJyYXJ5KCJtYWdyaXR0ciIpCgojIHNlcGFyYXRlIEdlby5Qb2ludCBjb2x1bW4gdG8gbGF0aXR1ZGUgYW5kIGxvbmdpdHVkZQojIGFuZCBjb252ZXJ0IHRvIG51bWVyaWMgdmFyaWFibGVzCnRyZWVzIDwtIHRyZWVzICU+JSAKICAgICAgICAgc2VwYXJhdGUoR2VvLlBvaW50LCBjKCJsYXQiLCAibG9uZyIpLCAiLCIpICU+JSAKICAgICAgICAgbXV0YXRlKGxhdCA9IGFzLm51bWVyaWMobGF0KSwKICAgICAgICAgICAgICAgIGxvbmcgPSBhcy5udW1lcmljKGxvbmcpKQoKIyBzZXBhcmF0ZSBzcGVjaWVzIGNvbHVtbiBpbnRvIGdlbnVzLCBzcGVjaWVzCnRyZWVzIDwtIHRyZWVzICU+JSAKICAgICAgICAgc2VwYXJhdGUoc3BlY2llcywgYygiZ2VudXMiLCAic3BlY2llcyIpLCAiLCIpCgojIHJlbW92ZSB0aGUgInNwcCIgaW5jbHVkZWQgYXMgYSBwbGFjZSBob2xkZXIgZm9yIGdlbnVzCnRyZWVzJGdlbnVzIDwtIGdzdWIoIiBzcHAiLCAiIiwgdHJlZXMkZ2VudXMpCgojIGNvcnJlY3QgdGhlIG1pc3NwZWxsaW5nIG9mIG1hcGwgdG8gbWFwbGUKdHJlZXMkZ2VudXMgPC0gZ3N1YigibWFwbCIsICJtYXBsZSIsIHRyZWVzJGdlbnVzLCBmaXhlZCA9IFRSVUUpCnRyZWVzJGdlbnVzIDwtIGdzdWIoIm1hcGxlZSIsICJtYXBsZSIsIHRyZWVzJGdlbnVzKQoKIyBjb252ZXJ0IHplcm9zIGluIG51bWVyaWMgY29sdW1ucyB0byBOQSBzbyB0aGV5IHdpbGwgbm90IGJlIGluY2x1ZGVkIGluIGdyYXBocwojIGluIHRoaXMgY2FzZSwgemVybyB2YWx1ZXMgYXJlIGR1ZSB0byBsYWNrIG9mIGluZm9ybWF0aW9uLCBub3QgbGFjayBvZiB2YWx1ZSwKIyBzbyBhbGwgd2VyZSBjb252ZXJ0ZWQgdG8gTkEgdmFsdWVzIHRvIGJlIGZpbHRlcmVkIG91dCBsYXRlcgojIHJlcGVhdCBmb3IgYmxhbmsgdmFsdWVzCnRyZWVzW3RyZWVzID09IDBdIDwtIE5BCnRyZWVzW3RyZWVzID09ICIiXSA8LSBOQQoKIyBjb252ZXJ0IGRhdGVzIHRvIGJldHRlciBmb3JtYXQKdHJlZXMgPC0gdHJlZXMgJT4lIAogIG11dGF0ZShtb2RpZmllZCA9IGFzLnllYXJtb24obW9kaWZpZWQsICIlbS8lWSIpKQoKIyBjbGVhbmVkIGRhdGEgZnJhbWUKaGVhZCh0cmVlcykKCmBgYAoKIyMgTnVtYmVyIG9mIFRyZWVzIGJ5IFNwZWNpZXMKCmBgYHtyfQojIG51bWJlciBvZiB0cmVlcyBieSBzcGVjaWVzCiMgaGlzdG9ncmFtCmBgYAoKCiMjIFJlbGF0aW9uc2hpcCBiZXR3ZWVuIHNwZWNpZXMgYWJ1bmRhbmNlIGFuZCBMYW5kIFVzZQoKYGBge3J9CiMgbGFuZHVzZSB2IHNwZWNpZXMKIyBtdWx0aXBsZSBiYXIgY2hhcnQKIyBzZWUgaWYgY2VydGFpbiB0eXBlcyBvZiB0cmVlcyBhcmUgbW9yZSBjb21tb24gYnkgYnVzaW5lc3MsIHJlc2lkZW50aWFsLCBldGMuCgojIGZvcm1hdCB0aGUgZGF0YSBpbnRvIGEgbmV3IGRhdGEgZnJhbQpsYW5kdXNlX2J5X2dlbnVzIDwtIHh0YWJzKGZvcm11bGEgPSB+IGxhbmR1c2UgKyBnZW51cywKICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gdHJlZXMpICU+JSAKICBwcm9wLnRhYmxlKG1hcmdpbiA9ICJsYW5kdXNlIikgJT4lICAjIGNhbGN1bGF0ZXMgcHJvcG9ydGlvCiAgCiAgIyBwaXBlIGludG8gZGF0YS5mcmFtZQogICMgVXNpbmcgZGF0YS5mcmFtZSgpIHR1cm5zIGl0IGZyb20gYSB0YWJsZSBpbnRvIGEgZGF0YSBmcmFtZQogIGRhdGEuZnJhbWUoKSAlPiUgCiAgZmlsdGVyKEZyZXEgPiAuMSkgCgpsYW5kdXNlX2J5X2dlbnVzCgpsYW5kdXNlX3NwZWNpZXNfYmFyIDwtIGdncGxvdChkYXRhID0gbGFuZHVzZV9ieV9nZW51cywKICAgICAgIG1hcHBpbmcgPSBhZXMoeCA9IGxhbmR1c2UsCiAgICAgICAgICAgICAgICAgICAgIGZpbGwgPSBnZW51cywKICAgICAgICAgICAgICAgICAgICAgeSA9IEZyZXEpKSArIAogIAogICMgTm93IHdlIG5lZWQgdG8gdGVsbCBnZW9tX2JhcigpIHRvIG92ZXJyaWRlIGl0J2QgZGVmYXVsdCBjaG9pY2Ugb2YgeQogICMgV2UgZG8gdGhpcyB3aXRoIHN0YXQgPSAiaWRlbnRpdHkiIChpZGVudGl0eSBtZWFucyAiVXNlIHRoZSB5IEkgZ2F2ZSB5b3UhIikKICBnZW9tX2Jhcihjb2xvciA9ICJibGFjayIsCiAgICAgICAgICAgc3RhdCA9ICJpZGVudGl0eSIsCiAgICAgICAgICAgcG9zaXRpb24gPSAiZmlsbCIpICsgCiAgCiAgbGFicyh5ID0gIlByb3BvcnRpb24iLCAKICAgICAgIHRpdGxlID0gIlRvcCBHZW51cyBwZXIgTGFuZCBVc2UgVHlwZSIsCiAgICAgICB4ID0gIkxhbmQgVXNlIFR5cGUiKQoKCmxhbmR1c2Vfc3BlY2llc19iYXIKCmBgYAoKIyMgVGhlIFRyZWVzIG9mIEJ1cmxpbmd0b24KCmBgYHtyIGxhbmRfdXNlX2Rpc3RyaWJ9CiMgd2lsbAojIG1hcCBvZiBidXJsaW5ndG9uIHdpdGggcG9pbnRzIGFzIHRyZWVzIGFuZCBjb2xvcmVkIGJ5IGxhbmQgdXNlIHR5cGUKCnRyZWVzX21hcCA8LSB0cmVlcyAlPiUKICBmaWx0ZXIoem9uZV9pZCA9PSAnd2FyZCA1JykKICBtYXBWaWV3KHRyZWVzX21hcCwgeGNvbCA9ICJsb25nIiwgeWNvbCA9ICJsYXQiLCB6Y29sID0gImFwcHJhaXNlIiwgY3JzID0gNDI2OSwgZ3JpZCA9IEZBTFNFKQoKCiMgbWFwIG9mIGJ1cmxpbmd0b24gd2l0aCBwb2ludHMgYXMgdHJlZXMgYW5kIGNvbG9yZWQgYnkgc3BlY2llcwojIGZhY2V0IHdyYXAgdGhlbSBuZXh0IHRvIGVhY2ggb3RoZXIhCgpgYGAKCiMjIEFwcHJhaXNhbCBFc3RpbWF0ZXMKCmBgYHtyfQojIGRpYW1ldGVyIHYgYXBwcmFpc2FsCiMgc2NhdHRlcnBsb3QgcHJvYmFibHkKCmBgYAoKCiMgTWFjaGluZSBMZWFybmluZwoKCg==